**************************************************************************************************************************************************
******************* Code for 'Are Investment Tax Breaks Effective: Australian Evidence' *******************
***************************************************** Code for Tables of summary stata- Appendix B******************************************************************

**************************************************DATE: Jun 2024 ******************************************************************************
*********************************************************************************************************************************************
*** Code strcuture
*00. Preliminaries and globals
*01. Load data
*02 MAke variable
*03 Regressions and charting for each policy

************* 00. Preliminaries **********			

clear all
global data ""
global savefile ""
sysdir set PLUS ""
sysdir set PERSONAL ""
mata: mata mlib index

************* 01. Laod and clean data **********					  

use "$data\capex_bit_frame_eginc_COY_20.dta", clear

*****************************************************
recode quarter (1=3 "Sep") (2=4 "Dec") (3=1 "Mar") (4=2 "June"), gen(CalQuarter)
gen time = yq(year, quarter)-2
format time %tq
drop if time==.

gen company = (bit_comp_ ==1 )	
gen individual = (bit_ind_ ==1 )	
gen partnership= (bit_part_ ==1 )	
gen trust= (bit_trust_ ==1 )

gen NotCompany = (bit_comp_ !=1 )	
gen NotCompanyNorTrust = (bit_comp_ !=1 & bit_trust_ !=1)	

sum firmid if company!=1 & individual!=1 & partnership!=1 & trust!=1
sum firmid if company==1 | individual==1 | partnership==1 | trust==1

encode division, g(div)
************* 02. MAke key varaibles **********					  

*log LHS variables
g l_caeqtot=ln(caeqtot)
g l_ce1eqtot=ln(ce1eqtot_capex) // ce1 variables are short term expectations - not used
g l_ce2eqtot=ln(ce2eqtot_capex) // ce2 variables are short term expectations - not used


*LHS non-zero firms
g caeqtot_nz = caeqtot
g ce1eqtot_capex_nz = ce1eqtot
g ce2eqtot_capex_nz = ce2eqtot_capex

replace caeqtot_nz=. if caeqtot_capex==0
replace ce1eqtot_capex_nz = . if ce1eqtot_capex==0
replace ce2eqtot_capex_nz = . if ce2eqtot_capex==0

*dummies for non-zero investment and expectations firms
g inv = 1 if (caeqtot_capex>0 & caeqtot_capex!=.)
g exp_1 = 1 if (ce1eqtot_capex>0 & ce1eqtot_capex!=.)
g exp_2 = 1 if (ce2eqtot_capex>0 & ce2eqtot_capex!=.)

*RHS variables
gen l_income= log(income)
gen l_income_l = ln(income_l)
gen l_income_l2 = ln(income_l2)

replace income_eg=income if income_eg==.
replace income_l_eg=income_l if income_l_eg==.
replace income_l2_eg=income_l2 if income_l2_eg==.

gen l_income_eg= log(income_eg)
gen l_income_l_eg = ln(income_l_eg)
gen l_income_l2_eg = ln(income_l2_eg)


xtile pct_income = l_income, nquantiles(10)
xtile pct_foreign = c_for_share_lag, nquantiles(10)


xtset firmid date

gen fte_l_firm = L.fte
gen l_fte_l = ln(fte_l)  //note  note firm level fte not eg
xtile pct_income_l = l_income_l, nquantiles(10) //remember this is xtile on firm level income


************* 03. REgressions **********					  

*********************************
**************************************************************************************************************************************************
*2mil group
**************************************************************************************************************************************************


gen policy_group_2m = .
replace  policy_group_2m = 1 if (income_eg < 2 000 000)
replace  policy_group_2m = 0 if (income_eg > 2 000 000 & income_eg < 5 000 000)

// 

*Create Log Odds
**************************
bysort policy_group_2m industry date: egen firms_size_ind = total(caeqtot_capex!=.)
bysort policy_group_2m industry date:  egen invs_size_ind = total(inv)
bysort policy_group_2m industry date:  egen exp1_size_ind = total(exp_1)
bysort policy_group_2m industry date:  egen exp2_size_ind = total(exp_2)

g invs_size_ind_share = invs_size_ind/firms_size_ind  // number of firms with positive investment in an industry/firms with nil or positive
g exp1_size_ind_share = exp1_size_ind/firms_size_ind  // share of firms with positive ands non-missing investment in an industry/firms with nil or positive
g exp2_size_ind_share = exp2_size_ind/firms_size_ind  // share of firms with posit ive investment in an industry/firms with nil or positive

g log_odds = ln(invs_size_ind_share/(1-invs_size_ind_share))
replace log_odds=4 if invs_size_ind_share==1

g log_odds_exp1 = ln(exp1_size_ind_share/(1-exp1_size_ind_share))
replace log_odds_exp1=4 if exp1_size_ind_share==1

g log_odds_exp2 = ln(exp2_size_ind_share/(1-exp2_size_ind_share))
replace log_odds_exp2=4 if exp2_size_ind_share==1

foreach var in caeqtot_nz ce1eqtot_capex_nz income_eg wages fte turnover {
	
egen rank_`var' = rank(-`var'), by(policy_group_2m date) unique
egen top1_`var' = total(`var'/ (rank_`var'==1) ), by (policy_group_2m date)
egen top2_`var' = total(`var'/ (rank_`var' >=1 & rank_`var' <=2) ), by (policy_group_2m date)
egen sum_`var' = total(`var'), by(policy_group_2m date)
gen top1sh_`var'= top1_`var'/sum_`var'
gen top2sh_`var'= top2_`var'/sum_`var' 
*tabstat top2sshare, statistics( mean ) by(date)	
}

// Create table of summary variables and export to csv
preserve
collapse (mean) caeqtot_nz log_odds ce1eqtot_capex_nz log_odds_exp1 income_eg  fte (sd) sd_caeqtot_nz=caeqtot_nz sd_log_odds=log_odds  sd_ce1eqtot_capex_nz=ce1eqtot_capex_nz sd_log_odds_exp1=log_odds_exp1 sd_income_eg=income_eg sd_fte=fte (count) n_caeqtot_nz=caeqtot_nz n_log_odds=log_odds n_ce1eqtot_capex_nz=ce1eqtot_capex_nz n_log_odds_exp1=log_odds_exp1 n_income_eg=income_eg  n_fte=fte if date > 44 & date < 77, by(policy_group_2m) 
outsheet using "$savefile/2m.csv", replace
restore


preserve
collapse (mean) top1sh_caeqtot_nz top1sh_ce1eqtot_capex_nz top1sh_income_eg top1sh_fte top2sh_caeqtot_nz top2sh_ce1eqtot_capex_nz top2sh_income_eg top2sh_fte if date > 44 & date < 77, by(policy_group_2m)
outsheet using "$savefile/2m_dominance.csv", replace
restore

**********
*2016 policy 2-10m
**************************************************************************************************************************************************

gen policy_group_2016 = .
replace policy_group_2016 = 1 if  (income_eg > 2 000 000 & income_eg < 10 000 000)
replace policy_group_2016 = 0 if (income_eg > 10 000 000 & income_eg < 20 000 000)

*policy date = 78 to 81
*estimation date = .. to 81
*next policy starts at 89 (i.e. 88 is pre policy)
g pol_period_2016 =  0
replace pol_period_2016 = ((quarter == 1 & year == 2017) | (quarter == 2 & year == 2017)| (quarter == 3 & year == 2017)| (quarter == 4 & year == 2017))


*Create Log Odds
**************************
capture drop firms_size_ind invs_size_ind exp1_size_ind exp2_size_ind invs_size_ind_share exp1_size_ind_share exp2_size_ind_share log_odds log_odds_exp1 log_odds_exp2

bysort policy_group_2016 industry date: egen firms_size_ind = total(caeqtot_capex!=.)
bysort policy_group_2016 industry date:  egen invs_size_ind = total(inv)
bysort policy_group_2016 industry date:  egen exp1_size_ind = total(exp_1)
bysort policy_group_2016 industry date:  egen exp2_size_ind = total(exp_2)

g invs_size_ind_share = invs_size_ind/firms_size_ind  // number of firms with positive investment in an industry/firms with nil or positive
g exp1_size_ind_share = exp1_size_ind/firms_size_ind  // share of firms with positive ands non-missing investment in an industry/firms with nil or positive
g exp2_size_ind_share = exp2_size_ind/firms_size_ind  // share of firms with positive investment in an industry/firms with nil or positive

g log_odds = ln(invs_size_ind_share/(1-invs_size_ind_share))
replace log_odds=4 if invs_size_ind_share==1

g log_odds_exp1 = ln(exp1_size_ind_share/(1-exp1_size_ind_share))
replace log_odds_exp1=4 if exp1_size_ind_share==1

g log_odds_exp2 = ln(exp2_size_ind_share/(1-exp2_size_ind_share))
replace log_odds_exp2=4 if exp2_size_ind_share==1


foreach var in caeqtot_nz ce1eqtot_capex_nz income_eg wages fte turnover {
	
egen rank_`var'2016 = rank(-`var'), by(policy_group_2016 date) unique
egen top1_`var'2016 = total(`var'/ (rank_`var'2016==1) ), by (policy_group_2016 date)
egen top2_`var'2016 = total(`var'/ (rank_`var'2016 >=1 & rank_`var'2016 <=2) ), by (policy_group_2016 date)
egen sum_`var'2016 = total(`var'), by(policy_group_2016 date)
gen top1sh_`var'2016 = top1_`var'2016/sum_`var'2016
gen top2sh_`var'2016= top2_`var'2016/sum_`var'2016
*tabstat top2sshare, statistics( mean ) by(date)	
}

// Create table of summary variables and export to csv
preserve
collapse (mean) caeqtot_nz log_odds ce1eqtot_capex_nz log_odds_exp1 income_eg  fte (sd) sd_caeqtot_nz=caeqtot_nz sd_log_odds=log_odds  sd_ce1eqtot_capex_nz=ce1eqtot_capex_nz sd_log_odds_exp1=log_odds_exp1 sd_income_eg=income_eg  sd_fte=fte (count) n_caeqtot_nz=caeqtot_nz n_log_odds=log_odds n_ce1eqtot_capex_nz=ce1eqtot_capex_nz n_log_odds_exp1=log_odds_exp1 n_income_eg=income_eg  n_fte=fte if date > 70 & date < 88, by(policy_group_2016)
outsheet using "$savefile/2016.csv", replace
restore


preserve
collapse (mean) top1sh_caeqtot_nz top1sh_ce1eqtot_capex_nz top1sh_income_eg top1sh_fte top2sh_caeqtot_nz top2sh_ce1eqtot_capex_nz top2sh_income_eg top2sh_fte if date > 70 & date < 88, by(policy_group_2016)
outsheet using "$savefile/2016_dominance.csv", replace
restore


**************************************************************************************************************************************************
*2019 policy $10-50m
**************************************************************************************************************************************************

gen policy_group_2019 = .
replace policy_group_2019 = 1 if  (income_eg > 10 000 000 & income_eg < 50 000 000)
replace policy_group_2019 = 0 if (income_eg > 50 000 000 & income_eg < 60 000 000)


g pol_period_2019 =  0
replace pol_period_2019 = ((quarter == 4 & year == 2019) | (quarter == 1 & year == 2020) | (quarter == 2 & year == 2020) | (quarter == 3 & year == 2020))


*Create Log Odds
**************************
drop firms_size_ind invs_size_ind exp1_size_ind exp2_size_ind invs_size_ind_share exp1_size_ind_share exp2_size_ind_share log_odds log_odds_exp1 log_odds_exp2

bysort policy_group_2019 industry date: egen firms_size_ind = total(caeqtot_capex!=.)
bysort policy_group_2019 industry date:  egen invs_size_ind = total(inv)
bysort policy_group_2019 industry date:  egen exp1_size_ind = total(exp_1)
bysort policy_group_2019 industry date:  egen exp2_size_ind = total(exp_2)

g invs_size_ind_share = invs_size_ind/firms_size_ind  // number of firms with positive investment in an industry/firms with nil or positive
g exp1_size_ind_share = exp1_size_ind/firms_size_ind  // share of firms with positive ands non-missing investment in an industry/firms with nil or positive
g exp2_size_ind_share = exp2_size_ind/firms_size_ind  // share of firms with positive investment in an industry/firms with nil or positive

g log_odds = ln(invs_size_ind_share/(1-invs_size_ind_share))
replace log_odds=4 if invs_size_ind_share==1

g log_odds_exp1 = ln(exp1_size_ind_share/(1-exp1_size_ind_share))
replace log_odds_exp1=4 if exp1_size_ind_share==1

g log_odds_exp2 = ln(exp2_size_ind_share/(1-exp2_size_ind_share))
replace log_odds_exp2=4 if exp2_size_ind_share==1


foreach var in caeqtot_nz ce1eqtot_capex_nz income_eg wages fte turnover {
	
egen rank_`var'2019 = rank(-`var'), by(policy_group_2019 date) unique
egen top1_`var'2019 = total(`var'/ (rank_`var'2019==1) ), by (policy_group_2019 date)
egen top2_`var'2019 = total(`var'/ (rank_`var'2019 >=1 & rank_`var'2019 <=2) ), by (policy_group_2019 date)
egen sum_`var'2019 = total(`var'), by(policy_group_2019 date)
gen top1sh_`var'2019 = top1_`var'2019/sum_`var'2019
gen top2sh_`var'2019= top2_`var'2019/sum_`var'2019
*tabstat top2sshare, statistics( mean ) by(date)	
}

// Create table of summary variables and export to csv
preserve
collapse (mean) caeqtot_nz log_odds ce1eqtot_capex_nz log_odds_exp1 income_eg  fte (sd) sd_caeqtot_nz=caeqtot_nz sd_log_odds=log_odds  sd_ce1eqtot_capex_nz=ce1eqtot_capex_nz sd_log_odds_exp1=log_odds_exp1 sd_income_eg=income_eg  sd_fte=fte (count) n_caeqtot_nz=caeqtot_nz n_log_odds=log_odds n_ce1eqtot_capex_nz=ce1eqtot_capex_nz n_log_odds_exp1=log_odds_exp1 n_income_eg=income_eg  n_fte=fte if date > 80 & date < 92, by(policy_group_2019)
outsheet using "$savefile/2019.csv", replace
restore


preserve
collapse (mean) top1sh_caeqtot_nz top1sh_ce1eqtot_capex_nz top1sh_income_eg top1sh_fte top2sh_caeqtot_nz top2sh_ce1eqtot_capex_nz top2sh_income_eg top2sh_fte if date > 80 & date < 92, by(policy_group_2019)
outsheet using "$savefile/2019_dominance.csv", replace
restore

**************************************************************************************************************************************************
*2020 policy
**********************************************************************************************************************************************

capture log using 2020_actuals, replace 

gen policy_group_2020 = .
replace policy_group_2020 = 1 if  (income_eg > 50 000 000 & income_eg < 500 000 000)
replace policy_group_2020 = 0 if (income_eg > 500 000 000 & income_eg < 600 000 000)


g pol_period_2020 =  0
replace pol_period_2020 = ((quarter == 3 & year == 2020)| (quarter == 4 & year == 2020) | (quarter == 1 & year == 2020))


*Create Log Odds
**************************
drop firms_size_ind invs_size_ind exp1_size_ind exp2_size_ind invs_size_ind_share exp1_size_ind_share exp2_size_ind_share log_odds log_odds_exp1 log_odds_exp2

bysort policy_group_2020 industry date: egen firms_size_ind = total(caeqtot_capex!=.)
bysort policy_group_2020 industry date:  egen invs_size_ind = total(inv)
bysort policy_group_2020 industry date:  egen exp1_size_ind = total(exp_1)
bysort policy_group_2020 industry date:  egen exp2_size_ind = total(exp_2)

g invs_size_ind_share = invs_size_ind/firms_size_ind  // number of firms with positive investment in an industry/firms with nil or positive
g exp1_size_ind_share = exp1_size_ind/firms_size_ind  // share of firms with positive ands non-missing investment in an industry/firms with nil or positive
g exp2_size_ind_share = exp2_size_ind/firms_size_ind  // share of firms with positive investment in an industry/firms with nil or positive

g log_odds = ln(invs_size_ind_share/(1-invs_size_ind_share))
replace log_odds=4 if invs_size_ind_share==1

g log_odds_exp1 = ln(exp1_size_ind_share/(1-exp1_size_ind_share))
replace log_odds_exp1=4 if exp1_size_ind_share==1

g log_odds_exp2 = ln(exp2_size_ind_share/(1-exp2_size_ind_share))
replace log_odds_exp2=4 if exp2_size_ind_share==1


foreach var in caeqtot_nz ce1eqtot_capex_nz income_eg wages fte turnover {
	
egen rank_`var'2020 = rank(-`var'), by(policy_group_2020 date) unique
egen top1_`var'2020 = total(`var'/ (rank_`var'2020==1) ), by (policy_group_2020 date)
egen top2_`var'2020 = total(`var'/ (rank_`var'2020 >=1 & rank_`var'2020 <=2) ), by (policy_group_2020 date)
egen sum_`var'2020 = total(`var'), by(policy_group_2020 date)
gen top1sh_`var'2020 = top1_`var'2020/sum_`var'2020
gen top2sh_`var'2020= top2_`var'2020/sum_`var'2020
*tabstat top2sshare, statistics( mean ) by(date)	
}



// Create table of summary variables and export to csv
preserve
collapse (mean) caeqtot_nz log_odds ce1eqtot_capex_nz log_odds_exp1 income_eg  fte (sd) sd_caeqtot_nz=caeqtot_nz sd_log_odds=log_odds  sd_ce1eqtot_capex_nz=ce1eqtot_capex_nz sd_log_odds_exp1=log_odds_exp1 sd_income_eg=income_eg sd_fte=fte (count) n_caeqtot_nz=caeqtot_nz n_log_odds=log_odds n_ce1eqtot_capex_nz=ce1eqtot_capex_nz n_log_odds_exp1=log_odds_exp1 n_income_eg=income_eg n_fte=fte if date > 80 & date < 94, by(policy_group_2020)
outsheet using "$savefile/2020.csv", replace
restore


preserve
collapse (mean) top1sh_caeqtot_nz top1sh_ce1eqtot_capex_nz top1sh_income_eg top1sh_fte top2sh_caeqtot_nz top2sh_ce1eqtot_capex_nz top2sh_income_eg top2sh_fte if date > 80 & date < 94, by(policy_group_2020)
outsheet using "$savefile/2020_dominance.csv", replace
restore

*************************************************
*2021 policy
*************************************************

gen policy_group_2021 = .
replace policy_group_2021 = 1 if  (income_eg > 500 000 000 & income_eg < 5 000 000 000)
replace policy_group_2021 = 0 if (income_eg > 5 000 000 000 & income_eg < 6 000 000 000)

g pol_period_2021 =  0
replace pol_period_2021 = ((quarter == 2 & year == 2021)| (quarter ==3 & year == 2021)| (quarter == 4 & year == 2021))


*Create Log Odds
**************************
drop firms_size_ind invs_size_ind exp1_size_ind exp2_size_ind invs_size_ind_share exp1_size_ind_share exp2_size_ind_share log_odds log_odds_exp1 log_odds_exp2

bysort policy_group_2021 industry date: egen firms_size_ind = total(caeqtot_capex!=.)
bysort policy_group_2021 industry date:  egen invs_size_ind = total(inv)
bysort policy_group_2021 industry date:  egen exp1_size_ind = total(exp_1)
bysort policy_group_2021 industry date:  egen exp2_size_ind = total(exp_2)

g invs_size_ind_share = invs_size_ind/firms_size_ind  // number of firms with positive investment in an industry/firms with nil or positive
g exp1_size_ind_share = exp1_size_ind/firms_size_ind  // share of firms with positive ands non-missing investment in an industry/firms with nil or positive
g exp2_size_ind_share = exp2_size_ind/firms_size_ind  // share of firms with positive investment in an industry/firms with nil or positive

g log_odds = ln(invs_size_ind_share/(1-invs_size_ind_share))
replace log_odds=4 if invs_size_ind_share==1

g log_odds_exp1 = ln(exp1_size_ind_share/(1-exp1_size_ind_share))
replace log_odds_exp1=4 if exp1_size_ind_share==1

g log_odds_exp2 = ln(exp2_size_ind_share/(1-exp2_size_ind_share))
replace log_odds_exp2=4 if exp2_size_ind_share==1

foreach var in caeqtot_nz ce1eqtot_capex_nz income_eg wages fte turnover {
	
egen rank_`var'2021 = rank(-`var'), by(policy_group_2021 date) unique
egen top1_`var'2021 = total(`var'/ (rank_`var'2021==1) ), by (policy_group_2021 date)
egen top2_`var'2021 = total(`var'/ (rank_`var'2021 >=1 & rank_`var'2021 <=2) ), by (policy_group_2021 date)
egen sum_`var'2021 = total(`var'), by(policy_group_2021 date)
gen top1sh_`var'2021 = top1_`var'2021/sum_`var'2021
gen top2sh_`var'2021= top2_`var'2021/sum_`var'2021
*tabstat top2sshare, statistics( mean ) by(date)	
}

tabstat caeqtot_nz log_odds ce1eqtot_capex_nz log_odds_exp1 income_eg turnover  fte if date>=78 & date <=100, statistics(mean sd count) by(policy_group_2021)  save
tabstat top1sh*2021 if date>=78 & date <=100, statistics(mean) by(policy_group_2021)  save
tabstat top2sh*2021  if date>=78 & date <=100, statistics(mean) by(policy_group_2021)  save


preserve
collapse (mean) caeqtot_nz log_odds ce1eqtot_capex_nz log_odds_exp1 income_eg  fte (sd) sd_caeqtot_nz=caeqtot_nz sd_log_odds=log_odds  sd_ce1eqtot_capex_nz=ce1eqtot_capex_nz sd_log_odds_exp1=log_odds_exp1 sd_income_eg=income_eg  sd_fte=fte (count) n_caeqtot_nz=caeqtot_nz n_log_odds=log_odds n_ce1eqtot_capex_nz=ce1eqtot_capex_nz n_log_odds_exp1=log_odds_exp1 n_income_eg=income_eg  n_fte=fte if date > 90 & date < 99, by(policy_group_2021)
outsheet using "$savefile/2021.csv", replace
restore


preserve
collapse (mean) top1sh_caeqtot_nz top1sh_ce1eqtot_capex_nz top1sh_income_eg top1sh_fte top2sh_caeqtot_nz top2sh_ce1eqtot_capex_nz top2sh_income_eg top2sh_fte if date > 90 & date < 99, by(policy_group_2021)
outsheet using "$savefile/2021_dominance.csv", replace
restore

////////////////////////

capture log close






















